package com.wck.car.car;


import org.springframework.stereotype.Component;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;


@Component
public class GithubRepoPageProcessor implements PageProcessor {


    private Site site = Site.me().setRetryTimes(3)
            .setSleepTime(100)
            .setDomain("https://www.autohome.com.cn/ashx/AjaxIndexCarFind.ashx?type=11")
            .setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.57 Safari/537.36");


    @Override
    public void process(Page page) {
        System.out.println("CONTENTTTTTTTTTTTTTTTTT" + page.getRawText());

        String str = page.getUrl().toString();
        if (str.contains("seriesid=") && str.contains("&format")) {
            //截取seriesId
            page.putField("seriesId", str.substring(str.indexOf("seriesid=") + 9, str.indexOf("&format")));
        }
        page.putField("repo", page.getRawText());

    }

    @Override
    public Site getSite() {
        return site;
    }

    public static void main(String[] args) {
        Spider.create(new GithubRepoPageProcessor())
                .addUrl("https://www.autohome.com.cn/ashx/index/GetHomeFindCar.ashx?type=1&brandid=33&v=1")
                .thread(5)
                .run();
    }
}
